#+##############################################################################
#
# T2h_l2h.pm: interface to LaTeX2HTML
#
#    Copyright (C) 1999, 2000, 2003, 2005, 2006, 2009 Free Software Foundation, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License,
# or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# This code was taken from the main texi2html file in 2006.
# Certainly originally written by Olaf Bachmann.
#
#-##############################################################################

require 5.0;
use strict;

package Texi2HTML::LaTeX2HTML;
use Cwd;


# latex2html conversions consist of three stages:
# 1) to latex: Put "latex" code into a latex file 
#                    (init, to_latex, finish_to_latex)
# 2) to html: Use latex2html to generate corresponding html code and images
#                    (to_html)
# 3) from html: Extract generated code and images from latex2html run
#                    (init_from_html, do_tex)

# init l2h defaults for files and names

# global variable used for caching
# FIXME there is no reason for this variable to be global
use vars qw(
            %l2h_cache
           );

my ($l2h_name, $l2h_latex_file, $l2h_cache_file, $l2h_html_file, $l2h_prefix);

# holds the status of latex2html operations. If 0 it means that there was 
# an error
my $status = 0;

my $debug;
my $verbose;
my $docu_rdir;
my $docu_name;

# init_from_html
my $extract_error_count;
my $invalid_counter_count;

# change_image_file_names
my %l2h_img;            # associate src file to destination file
                        # such that files are not copied twice
my $image_count;

# do_tex
my $html_output_count = 0;   # html text outputed in html result file

##########################
#
# First stage: Generation of Latex file
# Initialize with: init
# Add content with: to_latex ($text) --> HTML placeholder comment
# Finish with: finish_to_latex
#

my $l2h_latex_preamble = <<EOT;
% This document was automatically generated by the l2h extenstion of texi2html
% DO NOT EDIT !!!
\\documentclass{article}
\\usepackage{html}
\\begin{document}
EOT

my $l2h_latex_closing = <<EOT;
\\end{document}
EOT

my %l2h_to_latex = ();         # associate a latex text with the index in the
                               # html result array.
my @l2h_to_latex = ();         # array used to associate the index with 
                               # the original latex text.
my $latex_count = 0;           # number of latex texts really stored
my $latex_converted_count = 0; # number of latex texts passed through latex2html
my $to_latex_count = 0;        # total number of latex texts processed
my $cached_count = 0;          # number of cached latex texts
%l2h_cache = ();               # the cache hash. Associate latex text with 
                               # html from the previous run
my @l2h_from_html;             # array of resulting html

my %global_count = ();         # associate a command name and the 
                               # corresponding counter to the index in the
                               # html result array

# set $status to 1, if l2h could be initalized properly, to 0 otherwise
sub init()
{

   %l2h_to_latex = ();         # associate a latex text with the index in the
                               # html result array.
   @l2h_to_latex = ();         # array used to associate the index with 
                               # the original latex text.
   $latex_count = 0;           # number of latex texts really stored
   $latex_converted_count = 0; # number of latex texts passed through latex2html
   $to_latex_count = 0;        # total number of latex texts processed
   $cached_count = 0;          # number of cached latex texts
   %l2h_cache = ();            # the cache hash. Associate latex text with 
                               # html from the previous run
   @l2h_from_html = ();        # array of resulting html

   %global_count = ();         # associate a command name and the 
                               # corresponding counter to the index in the
                               # html result array
   $extract_error_count = 0;
   $invalid_counter_count = 0;
   %l2h_img = ();       # associate src file to destination file
                        # such that files are not copied twice
   $image_count = 1;

   $html_output_count = 0;   # html text outputed in html result file
   $status = 0;
   return if ($Texi2HTML::Config::null_device_file{$Texi2HTML::THISDOC{'filename'}->{'top'}});

    $docu_name = $Texi2HTML::THISDOC{'file_base_name'};
    $docu_rdir = $Texi2HTML::THISDOC{'destination_directory'};
    $docu_rdir = '' if (!defined($docu_rdir));
    $l2h_name =  "${docu_name}_l2h";
    $l2h_latex_file = "$docu_rdir${l2h_name}.tex";
    $l2h_cache_file = "${docu_rdir}${docu_name}-l2h_cache.pm";
    # destination dir -- generated images are put there, should be the same
    # as dir of enclosing html document --
    $l2h_html_file = "$docu_rdir${l2h_name}.html";
    $l2h_prefix = "${l2h_name}_";
    $debug = $Texi2HTML::THISDOC{'debug_l2h'};
    $verbose = Texi2HTML::Config::get_conf('VERBOSE');

    unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
    {
        unless (open(L2H_LATEX, ">$l2h_latex_file"))
        {
            main::document_error ("l2h: Can't open latex file '$l2h_latex_file' for writing: $!");
            $status = 0;
            return;
        }
        warn "# l2h: use ${l2h_latex_file} as latex file\n" if ($verbose);
        print L2H_LATEX $l2h_latex_preamble;
    }
    # open the database that holds cached text
    init_cache() if (!defined(Texi2HTML::Config::get_conf('L2H_SKIP')) or Texi2HTML::Config::get_conf('L2H_SKIP'));
    $status = 1;
}


# print text (2nd arg) into latex file (if not already there nor in cache)
# which can be later on replaced by the latex2html generated text.
# 
sub to_latex($$$)
{
    my $command = shift;
    my $text = shift;
    my $counter = shift;
    return unless ($status);
    if ($command eq 'tex')
    {
        $text .= ' ';
    }
    elsif ($command eq 'math') 
    {
        $text = "\$".$text."\$";
    }
    $to_latex_count++;
    $text =~ s/(\s*)$//;
    # try whether we have text already on things to do
    my $count = $l2h_to_latex{$text};
    unless ($count)
    {
        $latex_count++;
        $count = $latex_count;
        # try whether we can get it from cache
        my $cached_text = from_cache($text);
        if (defined($cached_text))
        {
             $cached_count++;
             # put the cached result in the html result array
             $l2h_from_html[$count] = $cached_text;
        }
        else
        {
             $latex_converted_count++;
             unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
             {
                 print L2H_LATEX "\\begin{rawhtml}\n\n";
                 print L2H_LATEX "<!-- l2h_begin $l2h_name $count -->\n";
                 print L2H_LATEX "\\end{rawhtml}\n";

                 print L2H_LATEX "$text\n";

                 print L2H_LATEX "\\begin{rawhtml}\n";
                 print L2H_LATEX "<!-- l2h_end $l2h_name $count -->\n\n";
                 print L2H_LATEX "\\end{rawhtml}\n";
            }
        }
        $l2h_to_latex[$count] = $text;
        $l2h_to_latex{$text} = $count;
    }
    $global_count{"${command}_$counter"} = $count; 
    return 1;
}

# print closing into latex file and close it
sub finish_to_latex()
{
    my $reused = $to_latex_count - $latex_converted_count - $cached_count;
    unless (Texi2HTML::Config::get_conf('L2H_SKIP'))
    {
        print L2H_LATEX $l2h_latex_closing;
        close (L2H_LATEX);
    }
    warn "# l2h: finished to latex ($cached_count cached, $reused reused, $latex_converted_count to process)\n" if ($verbose);
    unless ($latex_count)
    {
        # no @tex nor @math
        finish();
        return 0;
    }
    return 1;
}

###################################
# Second stage: Use latex2html to generate corresponding html code and images
#
# to_html([$l2h_latex_file, [$l2h_html_dir]]):
#   Call latex2html on $l2h_latex_file
#   Put images (prefixed with $l2h_name."_") and html file(s) in $l2h_html_dir
#   Return 1, on success
#          0, otherwise
#
sub to_html()
{
    my ($call, $dotbug);
    # when there are no tex constructs to convert (happens in case everything
    # comes from the cache), there is no latex2html run
    if (Texi2HTML::Config::get_conf('L2H_SKIP') or ($latex_converted_count == 0))
    {
        warn "# l2h: skipping latex2html run\n" if ($verbose);
        return 1;
    }
    # Check for dot in directory where dvips will work
    if (Texi2HTML::Config::get_conf('L2H_TMP'))
    {
        if (Texi2HTML::Config::get_conf('L2H_TMP') =~ /\./)
        {
            main::document_warn ("l2h: l2h_tmp dir contains a dot.");
            $dotbug = 1;
        }
    }
    else
    {
        if (cwd() =~ /\./)
        {
            main::document_warn ("l2h: current dir contains a dot.");
            $dotbug = 1;
        }
    }
    # fix it, if necessary and hope that it works
    #$Texi2HTML::Config::L2H_TMP = "/tmp" if ($dotbug);
    return 0 if ($dotbug);

    $call = Texi2HTML::Config::get_conf('L2H_L2H');
    # use init file, if specified
    #my $init_file = main::locate_init_file(Texi2HTML::Config::get_conf('L2H_FILE'));
    my $init_file = Texi2HTML::Config::get_conf('L2H_FILE');
    $call = $call . " -init_file " . $init_file if (defined($init_file) and $init_file ne '' and -f $init_file and -r $init_file);
    # set output dir
    $call .=  (($docu_rdir ne '') ? " -dir $docu_rdir" : " -no_subdir");
    # use l2h_tmp, if specified
    $call .= " -tmp ".Texi2HTML::Config::get_conf('L2H_TMP')
         if (defined(Texi2HTML::Config::get_conf('L2H_TMP')) and Texi2HTML::Config::get_conf('L2H_TMP') ne '');
    # use a given html version if specified
    $call .= " -html_version ".Texi2HTML::Config::get_conf('L2H_HTML_VERSION')
      if (defined(Texi2HTML::Config::get_conf('L2H_HTML_VERSION')) and Texi2HTML::Config::get_conf('L2H_HTML_VERSION') ne '');
    # options we want to be sure of
    $call .= " -address 0 -info 0 -split 0 -no_navigation -no_auto_link";
    $call .= " -prefix $l2h_prefix $l2h_latex_file";

    warn "# l2h: executing '$call'\n" if ($verbose);
    if (system($call))
    {
        main::document_error ("l2h: '${call}' did not succeed");
        return 0;
    }
    else
    {
        warn "# l2h: latex2html finished successfully\n" if ($verbose);
        return 1;
    }
}

##########################
# Third stage: Extract generated contents from latex2html run
# Initialize with: init_from_html
#   open $l2h_html_file for reading
#   reads in contents into array indexed by numbers
#   return 1,  on success -- 0, otherwise
# Finish with: finish
#   closes $l2h_html_dir/$l2h_name.".$docu_ext"


# the images generated by latex2html have names like ${docu_name}_l2h_img?.png
# they are copied to ${docu_name}_?.png, and html is changed accordingly.

# FIXME is it really necessary to bother doing that? Looks like an unneeded
# complication to me (pertusus, 2009), and it could go bad if there is some
# SRC="(.*?)" in the text (though the regexp could be made more specific).

# %l2h_img;            # associate src file to destination file
                        # such that files are not copied twice
sub change_image_file_names($)
{
    my $content = shift;
    my @images = ($content =~ /SRC="(.*?)"/g);
    my ($src, $dest);

    for $src (@images)
    {
        $dest = $l2h_img{$src};
        unless ($dest)
        {
            my $ext = '';
            if ($src =~ /.*\.(.*)$/ and (!defined(Texi2HTML::Config::get_conf('EXTENSION')) or $1 ne Texi2HTML::Config::get_conf('EXTENSION')))
            {
                $ext = ".$1";
            }
            else
            { # A warning when the image extension is the same than the 
              # document extension. copying the file could result in 
              # overwriting an output file (almost surely if the default 
              # texi2html file names are used).
                main::document_warn ("L2h image $src has invalid extension");
                next;
            }
            while (-e "$docu_rdir${docu_name}_${image_count}$ext")
            {
                $image_count++;
            }
            $dest = "${docu_name}_${image_count}$ext";
            if ($debug)
            {
               # not portable, but only used with debug.
               system("cp -f $docu_rdir$src $docu_rdir$dest");
            }
            else
            {
               # FIXME error condition not checked.
               rename ("$docu_rdir$src", "$docu_rdir$dest");
            }
            $l2h_img{$src} = $dest;
            #unlink "$docu_rdir$src" unless ($debug);
        }
        $content =~ s/SRC="$src"/SRC="$dest"/g;
    }
    return $content;
}

sub init_from_html()
{
    # when there are no tex constructs to convert (happens in case everything
    # comes from the cache), the html file that was generated by previous
    # latex2html runs isn't reused.
    if ($latex_converted_count == 0)
    {
        return 1;
    }

    if (! open(L2H_HTML, "<$l2h_html_file"))
    {
        main::document_warn ("l2h: Can't open $l2h_html_file for reading");
        return 0;
    }
    warn "# l2h: use $l2h_html_file as html file\n" if ($verbose);

    my $html_converted_count = 0;   # number of html resulting texts 
                                    # retrieved in the file

    my ($count, $h_line);
    while ($h_line = <L2H_HTML>)
    {
        if ($h_line =~ /!-- l2h_begin $l2h_name ([0-9]+) --/)
        {
            $count = $1;
            my $h_content = '';
            my $h_end_found = 0;
            while ($h_line = <L2H_HTML>)
            {
                if ($h_line =~ /!-- l2h_end $l2h_name $count --/)
                {
                    $h_end_found = 1;
                    chomp $h_content;
                    chomp $h_content;
                    $html_converted_count++;
                    # transform image file names and copy image files
                    $h_content = change_image_file_names($h_content);
                    # store result in the html result array
                    $l2h_from_html[$count] = $h_content;
                    # also add the result in cache hash
                    $l2h_cache{$l2h_to_latex[$count]} = $h_content;
                    last;
                }
                $h_content = $h_content.$h_line;
            }
            unless ($h_end_found)
            { # couldn't found the closing comment. Certainly  a bug.
                main::msg_debug ("l2h: l2h_end $l2h_name $count not found");
                close(L2H_HTML);
                return 0;
            }
        }
    }

    # Not the same number of converted elements and retrieved elements
    if ($latex_converted_count != $html_converted_count)
    {
        main::msg_debug ("l2h: waiting for $latex_converted_count elements found $html_converted_count");
    }

    warn "# l2h: Got $html_converted_count of $latex_count html contents\n"
        if ($verbose);

    close(L2H_HTML);
    return 1;
}

# $html_output_count = 0;   # html text outputed in html result file

# called each time a construct handled by latex2html is encountered, should
# output the corresponding html
sub do_tex($$$$)
{
    my $style = shift;
    my $counter = shift;
    my $state = shift;
    return unless ($status);
    my $count = $global_count{"${style}_$counter"}; 
    ################################## begin debug section (incorrect counts)
    if (!defined($count))
    {
         # counter is undefined
         $invalid_counter_count++;
         main::msg_debug ("l2h: undefined count for ${style}_$counter");
         return ("<!-- l2h: ". __LINE__ . " undef count for ${style}_$counter -->")
                if ($debug);
         return '';
    }
    elsif(($count <= 0) or ($count > $latex_count))
    {
        # counter out of range
        $invalid_counter_count++;
        main::msg_debug ("l2h: Request of $count content which is out of valide range [0,$latex_count)");
         return ("<!-- l2h: ". __LINE__ . " out of range count $count -->") 
                if ($debug);
         return '';
    }
    ################################## end debug section (incorrect counts)

    # this seems to be a valid counter
    my $result = '';
    $result = "<!-- l2h_begin $l2h_name $count -->" if ($debug);
    if (defined($l2h_from_html[$count]))
    {
         $html_output_count++;
         # maybe we could also have something if simple_format
         # with Texi2HTML::Config::protect_text in case there
         # was some @math on a line passed through simple_format.
         # This would certainly be illegal texinfo, however.
         if ($state->{'remove_texi'})
         {# don't protect anything
             $result .= $l2h_to_latex[$count];
         }
         else
         { 
             $result .= $l2h_from_html[$count];
         }
    } 
    else
    {
    # if the result is not in @l2h_from_html, there is an error somewhere.
        $extract_error_count++;
        main::msg_debug ("l2h: can't extract content $count from html");
        # try simple (ordinary) substitution (without l2h)
        $result .= "<!-- l2h: ". __LINE__ . " use texi2html -->" if ($debug);
        $result .= main::substitute_text({}, undef, 'error in l2h', $l2h_to_latex[$count]);
    }
    $result .= "<!-- l2h_end $l2h_name $count -->" if ($debug);
    return $result;
}

# store results in the cache and remove temporary files.
sub finish()
{
    return unless($status);
    if ($verbose)
    {
        if ($extract_error_count + $invalid_counter_count)
        {
            warn "# l2h: finished from html ($extract_error_count extract and $invalid_counter_count invalid counter errors)\n";
        }
        else
        {
            warn "# l2h: finished from html (no error)\n";
        }
        if ($html_output_count != $latex_converted_count)
        { # this may happen if @-commands are collected at some places
          # but @-command at those places are not expanded later. For 
          # example @math on @multitable lines.
             warn "# l2h: $html_output_count html outputed for $latex_converted_count converted\n";
        }
    }
    store_cache();
    if (Texi2HTML::Config::get_conf('L2H_CLEAN'))
    {
        local ($_);
        warn "# l2h: removing temporary files generated by l2h extension\n"
            if $verbose;
        while (<"$docu_rdir$l2h_name"*>)
        {
# FIXME error condition not checked
            unlink $_;
        }
    }
    warn "# l2h: Finished\n" if $verbose;
    return 1;
}

# the driver of end of first pass and second pass
#
sub latex2html()
{
    return unless($status);
    return unless ($status = finish_to_latex());
    return unless ($status = to_html());
}


##############################
# stuff for l2h caching
#
# FIXME it is clear that l2h stuff takes very long compared with texi2html
# which is already quite long. However this also adds some complexity 

# I tried doing this with a dbm data base, but it did not store all
# keys/values. Hence, I did as latex2html does it
sub init_cache
{
    if (-r "$l2h_cache_file")
    {
        my $rdo = do "$l2h_cache_file";
        main::document_error ("l2h: could not load $docu_rdir$l2h_cache_file: $@")
            unless ($rdo);
    }
}

# store all the text obtained through latex2html
sub store_cache
{
    return unless $latex_count;
    my ($key, $value);
    unless (open(FH, ">$l2h_cache_file"))
    { 
        main::document_error ("l2h: could not open $docu_rdir$l2h_cache_file for writing: $!");
        return;
    }
    #while (($key, $value) = each %l2h_cache)
    foreach my $key(sort(keys(%l2h_cache))) {
        $value = $l2h_cache{$key};
        # escape stuff
        $key =~ s|/|\\/|g;
        $key =~ s|\\\\/|\\/|g;
        # weird, a \ at the end of the key results in an error
        # maybe this also broke the dbm database stuff
        $key =~ s|\\$|\\\\|;
        $value =~ s/\|/\\\|/go;
        $value =~ s/\\\\\|/\\\|/go;
        $value =~ s|\\\\|\\\\\\\\|g;
        print FH "\n\$l2h_cache_key = q/$key/;\n";
        print FH "\$l2h_cache{\$l2h_cache_key} = q|$value|;\n";
    }
    print FH "1;";
    close (FH);
}

# return cached html, if it exists for text, and if all pictures
# are there, as well
sub from_cache($)
{
    my $text = shift;
    my $cached = $l2h_cache{$text};
    if (defined($cached))
    {
        while ($cached =~ m/SRC="(.*?)"/g)
        {
            unless (-e "$docu_rdir$1")
            {
                return undef;
            }
        }
        return $cached;
    }
    return undef;
}

1;
